FLOWERS IMAGE CLASSIFIER
We are going to create a model to classify 5 different types of flowers from their images.
Tulips, roses, dandelions, daisies and sunflowers.
The original pictures can be found here
Although the flowers were categorized in their own subfolders, the file names were all random. So I created a local script to properly rename all the images. Make sure to change the directory accordingly if you're going to be using this script.
import os
import shutil
# Define the source directory (main folder)
source_directory = r'C:\Users\nguye\Desktop\flowers'
# Define a dictionary to map folder names to category labels
category_mapping = {
'daisy': 'daisy',
'dandelion': 'dandelion',
'rose': 'rose',
'sunflower': 'sunflower',
'tulip': 'tulip'
}
# Create the main destination directory 'renamed_flowers'
destination_directory = r'C:\Users\nguye\Desktop\renamed_flowers'
# Create the 'renamed_flowers' directory if it doesn't exist
if not os.path.exists(destination_directory):
os.makedirs(destination_directory)
# Iterate through subfolders in the source directory
for folder_name in os.listdir(source_directory):
folder_path = os.path.join(source_directory, folder_name)
# Check if it's a directory and is in the category mapping
if os.path.isdir(folder_path) and folder_name in category_mapping:
# Get the category label
category = category_mapping[folder_name]
# Create a subfolder for the category in 'renamed_flowers'
category_destination = os.path.join(destination_directory, folder_name)
os.makedirs(category_destination, exist_ok=True)
# Iterate through image files in the subfolder
for filename in os.listdir(folder_path):
if filename.endswith('.jpg'):
# Create a unique filename
new_filename = f'{category}_{len(os.listdir(category_destination))}.jpg'
# Copy the image to the corresponding category subfolder in 'renamed_flowers'
shutil.copy(os.path.join(folder_path, filename), os.path.join(category_destination, new_filename))
print("Image categorization and renaming completed.")
Next I dumped all the renamed images into a single folder. Sorted them alphabetically, and created a csv file to mark the images with its corresponding label.
import os
import pandas as pd
# Define the folder path where your flower images are located
folder_path = r'C:\Users\nguye\Desktop\renamed_flowers' # Replace with the actual folder path
# Initialize lists to store file names and labels
file_names = []
labels = []
# List all files in the folder
for filename in os.listdir(folder_path):
if filename.endswith('.jpg') or filename.endswith('.jpeg') or filename.endswith('.png'):
# Extract the label from the filename (assuming the format is 'label_*.jpg')
label = filename.split('_')[0]
# Append the file name and label to the lists
file_names.append(filename)
labels.append(label)
# Create a DataFrame to store the file names and labels
data = {'file_name': file_names, 'Label': labels}
df = pd.DataFrame(data)
# Save the DataFrame to a CSV file on your desktop
desktop_path = os.path.expanduser('~/Desktop') # Get the path to your desktop
csv_file_path = os.path.join(desktop_path, 'flower_label.csv')
df.to_csv(csv_file_path, index=False)
print(f"CSV file '{csv_file_path}' created with labels.")
I compressed the flowers folder into a zip file and uploaded it to google drive along with the csv file. Now I can start working.
pip install imbalanced-learn
Requirement already satisfied: imbalanced-learn in /usr/local/lib/python3.10/dist-packages (0.10.1) Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.10/dist-packages (from imbalanced-learn) (1.23.5) Requirement already satisfied: scipy>=1.3.2 in /usr/local/lib/python3.10/dist-packages (from imbalanced-learn) (1.11.4) Requirement already satisfied: scikit-learn>=1.0.2 in /usr/local/lib/python3.10/dist-packages (from imbalanced-learn) (1.2.2) Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from imbalanced-learn) (1.3.2) Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from imbalanced-learn) (3.2.0)
# Import necessary libraries
import os
import cv2
import zipfile
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import random
from PIL import Image
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import preprocess_input
from imblearn.over_sampling import SMOTE
import shutil
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Connect to google drive
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
# Define paths to necessary files in Google Drive
zip_file_path = '/content/drive/My Drive/flowers.zip'
csv_file_path = '/content/drive/My Drive/flowers_labels.csv'
# Unzip the images file
extracted_dir = '/content/extracted_flowers' # Define the directory to extract the images
os.makedirs(extracted_dir, exist_ok=True)
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
zip_ref.extractall(extracted_dir)
# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)
Now I check to see if the images match their labels by printing 60 random images along with their corresponding labels.
# Randomly Select 40 Samples
num_samples_to_check = 40
samples_to_check = df.sample(n=num_samples_to_check, random_state=42)
# Create a 10x10 grid to display images
plt.figure(figsize=(15, 15))
for i, (index, row) in enumerate(samples_to_check.iterrows()):
label = row['Label'] # Assuming 'Label' is the correct column name for labels
file_name = row['File_name'] # Assuming 'file_name' is the correct column name for file names
# Construct the image path based on the file name and the corrected extracted directory
image_path = os.path.join(extracted_dir, file_name)
# Check if the image file exists and is readable
if os.path.exists(image_path):
image = cv2.imread(image_path)
if image is not None:
# Resize the image to a smaller size (e.g., 80x100 pixels)
image = cv2.resize(image, (80, 80))
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Plot the image in the grid
plt.subplot(10, 10, i + 1)
plt.imshow(image)
plt.title(f'Label: {label}')
plt.axis('off')
else:
print(f"Failed to read image: {image_path}")
else:
print(f"Image not found: {image_path}")
plt.tight_layout()
plt.show()
Everything looks to be correct.
# Double-checking Labels dataframe
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4317 entries, 0 to 4316 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 File_name 4317 non-null object 1 Label 4317 non-null object dtypes: object(2) memory usage: 67.6+ KB
df.head()
| File_name | Label | |
|---|---|---|
| 0 | daisy_0.jpg | daisy |
| 1 | daisy_1.jpg | daisy |
| 2 | daisy_10.jpg | daisy |
| 3 | daisy_100.jpg | daisy |
| 4 | daisy_101.jpg | daisy |
Exploratory Data Analysis
# Set a style for the plot
sns.set_style("whitegrid")
# Create a displot of the flower categories
plt.figure(figsize=(10, 6))
sns.histplot(data=df, x='Label', discrete=True)
plt.title('Distribution of Flower Categories')
plt.xlabel('Flower Categories')
plt.ylabel('Frequency')
plt.xticks(rotation=45)
plt.show()
The dataset is slightly imbalanced. We may have to oversample to get better results.
Data Preprocessing
# Create an ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
rescale=1./255, # Normalize pixel values to the range [0, 1]
rotation_range=20, # Randomly rotate images by up to 20 degrees
width_shift_range=0.1, # Randomly shift the width of images
height_shift_range=0.1, # Randomly shift the height of images
shear_range=0.2, # Apply random shear transformations
zoom_range=0.2, # Randomly zoom into images
horizontal_flip=True, # Randomly flip images horizontally
fill_mode='nearest' # Fill in missing pixels with the nearest values
)
# Prepare a directory for saving preprocessed images
preprocessed_dir = '/content/preprocessed_images'
os.makedirs(preprocessed_dir, exist_ok=True)
# Preprocess and save the images
for index, row in df.iterrows():
label = row['Label'] # Assuming 'Label' is the correct column name for labels
file_name = row['File_name'] # Assuming 'file_name' is the correct column name for file names
# Construct the image path based on the file name and the corrected extracted directory
image_path = os.path.join(extracted_dir, file_name)
# Check if the image file exists and is readable
if os.path.exists(image_path):
image = cv2.imread(image_path)
if image is not None:
# Resize the image to a consistent size (e.g., 100x100 pixels)
image = cv2.resize(image, (100, 100))
# Apply data augmentation
image = datagen.random_transform(image)
# Save the preprocessed image
preprocessed_image_path = os.path.join(preprocessed_dir, f'{label}_{index}.jpg')
cv2.imwrite(preprocessed_image_path, image)
else:
print(f"Failed to read image: {image_path}")
else:
print(f"Image not found: {image_path}")
Let's preview our preprocessed images.
# Define the directory path where the preprocessed images are located
preprocessed_dir = '/content/preprocessed_images' # Update to the correct directory path
# List all files in the directory
preprocessed_files = os.listdir(preprocessed_dir)
# Randomly select 20 preprocessed image files
num_samples_to_check = 20 # Adjust this number as needed
random_samples = random.sample(preprocessed_files, num_samples_to_check)
# Create a 4x5 grid to display images
plt.figure(figsize=(12, 10))
for i, file_name in enumerate(random_samples):
# Extract the label from the file name (assuming the format is 'label_*.jpg')
label = file_name.split('_')[0]
# Construct the image path based on the file name and directory
image_path = os.path.join(preprocessed_dir, file_name)
# Load and display the preprocessed image
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
plt.subplot(4, 5, i + 1)
plt.imshow(image)
plt.title(f'Label: {label}')
plt.axis('off')
plt.tight_layout()
plt.show()
Looks good. Now let's preprocess the data by turning the images and labels into arrays.
# List all files in the directory
preprocessed_files = os.listdir(preprocessed_dir)
# Create lists to store image data and corresponding labels
images = []
labels = []
# Load images and extract labels from file names
for file_name in preprocessed_files:
# Extract the label from the file name (assuming the format is 'label_*.jpg')
label = file_name.split('_')[0]
# Read and preprocess the image
image_path = os.path.join(preprocessed_dir, file_name)
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
images.append(image)
labels.append(label)
# Convert the lists to NumPy arrays
images = np.array(images)
labels = np.array(labels)
Now we train/test split the data and one-hot encode the target.
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)
# Normalize pixel values to the range [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0
# Use LabelEncoder to convert string labels to numerical labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
# Convert the numerical labels to one-hot encoded format
y_train = to_categorical(y_train_encoded, num_classes=5)
y_test = to_categorical(y_test_encoded, num_classes=5)
Let's train our first model.
# Define the CNN model
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)),
MaxPooling2D(2, 2),
Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D(2, 2),
Conv2D(128, (3, 3), activation='relu'),
MaxPooling2D(2, 2),
Conv2D(256, (3, 3), activation='relu'),
MaxPooling2D(2, 2),
Flatten(),
Dense(128, activation='relu'),
Dropout(0.5),
Dense(5, activation='softmax')
])
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)
# Evaluate the model on the testing data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')
# Predict labels for the testing data
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)
# Calculate accuracy score
accuracy = accuracy_score(y_true_classes, y_pred_classes)
print(f'Accuracy Score: {accuracy:.4f}')
# Generate a classification report
target_names = label_encoder.classes_
classification_rep = classification_report(y_true_classes, y_pred_classes, target_names=target_names)
print('Classification Report:')
print(classification_rep)
# Generate a confusion matrix
confusion = confusion_matrix(y_true_classes, y_pred_classes)
# Plot the confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.2)
sns.heatmap(confusion, annot=True, fmt='d', cmap='Blues', xticklabels=target_names, yticklabels=target_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()
Epoch 1/20
87/87 [==============================] - 6s 24ms/step - loss: 1.4223 - accuracy: 0.3613 - val_loss: 1.2436 - val_accuracy: 0.4472
Epoch 2/20
87/87 [==============================] - 1s 17ms/step - loss: 1.2554 - accuracy: 0.4627 - val_loss: 1.1409 - val_accuracy: 0.5384
Epoch 3/20
87/87 [==============================] - 1s 17ms/step - loss: 1.1221 - accuracy: 0.5438 - val_loss: 1.1495 - val_accuracy: 0.5557
Epoch 4/20
87/87 [==============================] - 2s 18ms/step - loss: 1.0133 - accuracy: 0.5974 - val_loss: 1.0245 - val_accuracy: 0.6035
Epoch 5/20
87/87 [==============================] - 2s 19ms/step - loss: 0.9552 - accuracy: 0.6271 - val_loss: 0.9577 - val_accuracy: 0.6454
Epoch 6/20
87/87 [==============================] - 2s 23ms/step - loss: 0.9108 - accuracy: 0.6437 - val_loss: 0.8913 - val_accuracy: 0.6744
Epoch 7/20
87/87 [==============================] - 2s 19ms/step - loss: 0.8501 - accuracy: 0.6694 - val_loss: 0.8988 - val_accuracy: 0.6729
Epoch 8/20
87/87 [==============================] - 2s 19ms/step - loss: 0.7891 - accuracy: 0.6995 - val_loss: 0.9134 - val_accuracy: 0.6614
Epoch 9/20
87/87 [==============================] - 2s 21ms/step - loss: 0.7730 - accuracy: 0.7140 - val_loss: 0.9142 - val_accuracy: 0.6614
Epoch 10/20
87/87 [==============================] - 2s 19ms/step - loss: 0.7139 - accuracy: 0.7223 - val_loss: 0.8850 - val_accuracy: 0.6715
Epoch 11/20
87/87 [==============================] - 2s 20ms/step - loss: 0.6561 - accuracy: 0.7404 - val_loss: 0.8871 - val_accuracy: 0.6845
Epoch 12/20
87/87 [==============================] - 1s 15ms/step - loss: 0.6041 - accuracy: 0.7719 - val_loss: 0.9996 - val_accuracy: 0.6570
Epoch 13/20
87/87 [==============================] - 2s 19ms/step - loss: 0.5413 - accuracy: 0.7980 - val_loss: 0.9851 - val_accuracy: 0.6700
Epoch 14/20
87/87 [==============================] - 2s 22ms/step - loss: 0.4484 - accuracy: 0.8331 - val_loss: 0.9916 - val_accuracy: 0.6889
Epoch 15/20
87/87 [==============================] - 2s 28ms/step - loss: 0.4132 - accuracy: 0.8461 - val_loss: 1.0393 - val_accuracy: 0.6975
Epoch 16/20
87/87 [==============================] - 2s 23ms/step - loss: 0.3370 - accuracy: 0.8747 - val_loss: 1.1439 - val_accuracy: 0.6889
Epoch 17/20
87/87 [==============================] - 1s 17ms/step - loss: 0.3084 - accuracy: 0.8845 - val_loss: 1.1210 - val_accuracy: 0.6961
Epoch 18/20
87/87 [==============================] - 1s 15ms/step - loss: 0.2591 - accuracy: 0.9084 - val_loss: 1.4482 - val_accuracy: 0.6614
Epoch 19/20
87/87 [==============================] - 1s 15ms/step - loss: 0.2033 - accuracy: 0.9269 - val_loss: 1.5395 - val_accuracy: 0.6686
Epoch 20/20
87/87 [==============================] - 1s 15ms/step - loss: 0.2048 - accuracy: 0.9283 - val_loss: 1.3548 - val_accuracy: 0.6889
27/27 [==============================] - 0s 9ms/step - loss: 1.3900 - accuracy: 0.6725
Test Loss: 1.3900
Test Accuracy: 0.6725
27/27 [==============================] - 0s 6ms/step
Accuracy Score: 0.6725
Classification Report:
precision recall f1-score support
daisy 0.68 0.71 0.69 157
dandelion 0.66 0.83 0.74 198
rose 0.54 0.57 0.56 154
sunflower 0.83 0.64 0.72 141
tulip 0.70 0.60 0.65 214
accuracy 0.67 864
macro avg 0.68 0.67 0.67 864
weighted avg 0.68 0.67 0.67 864
Bad recall for rose and tulip. Undesirable score for sunflower. Let's try a VGG16 premade model.
# Create the VGG16 base model (without top layers)
input_tensor = Input(shape=(100, 100, 3))
base_model = VGG16(include_top=False, weights='imagenet', input_tensor=input_tensor)
# Add custom top layers for flower classification
x = Flatten()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(5, activation='softmax')(x)
# Create the final model
model = Model(inputs=input_tensor, outputs=x)
# Freeze the weights of the VGG16 base model
for layer in base_model.layers:
layer.trainable = False
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)
# Evaluate the model on the testing data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')
# Predict labels for the testing data
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)
# Calculate accuracy score
accuracy = accuracy_score(y_true_classes, y_pred_classes)
print(f'Accuracy Score: {accuracy:.4f}')
# Generate a classification report
target_names = label_encoder.classes_
classification_rep = classification_report(y_true_classes, y_pred_classes, target_names=target_names)
print('Classification Report:')
print(classification_rep)
# Generate a confusion matrix
confusion = confusion_matrix(y_true_classes, y_pred_classes)
# Plot the confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.2) # Adjust font size
sns.heatmap(confusion, annot=True, fmt='d', cmap='Blues', xticklabels=target_names, yticklabels=target_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()
Epoch 1/20
87/87 [==============================] - 6s 57ms/step - loss: 1.2096 - accuracy: 0.5196 - val_loss: 0.8196 - val_accuracy: 0.7221
Epoch 2/20
87/87 [==============================] - 4s 44ms/step - loss: 0.8516 - accuracy: 0.6752 - val_loss: 0.7767 - val_accuracy: 0.7178
Epoch 3/20
87/87 [==============================] - 4s 50ms/step - loss: 0.7126 - accuracy: 0.7350 - val_loss: 0.7673 - val_accuracy: 0.7236
Epoch 4/20
87/87 [==============================] - 4s 51ms/step - loss: 0.6333 - accuracy: 0.7694 - val_loss: 0.6975 - val_accuracy: 0.7410
Epoch 5/20
87/87 [==============================] - 4s 45ms/step - loss: 0.5688 - accuracy: 0.7915 - val_loss: 0.7276 - val_accuracy: 0.7091
Epoch 6/20
87/87 [==============================] - 4s 51ms/step - loss: 0.5011 - accuracy: 0.8110 - val_loss: 0.7506 - val_accuracy: 0.7323
Epoch 7/20
87/87 [==============================] - 4s 43ms/step - loss: 0.4517 - accuracy: 0.8302 - val_loss: 0.6868 - val_accuracy: 0.7453
Epoch 8/20
87/87 [==============================] - 4s 43ms/step - loss: 0.4064 - accuracy: 0.8512 - val_loss: 0.7191 - val_accuracy: 0.7265
Epoch 9/20
87/87 [==============================] - 4s 50ms/step - loss: 0.3909 - accuracy: 0.8595 - val_loss: 0.6928 - val_accuracy: 0.7381
Epoch 10/20
87/87 [==============================] - 4s 52ms/step - loss: 0.3332 - accuracy: 0.8780 - val_loss: 0.7063 - val_accuracy: 0.7381
Epoch 11/20
87/87 [==============================] - 4s 49ms/step - loss: 0.2958 - accuracy: 0.9012 - val_loss: 0.7258 - val_accuracy: 0.7308
Epoch 12/20
87/87 [==============================] - 4s 50ms/step - loss: 0.2452 - accuracy: 0.9211 - val_loss: 0.7468 - val_accuracy: 0.7395
Epoch 13/20
87/87 [==============================] - 4s 45ms/step - loss: 0.2430 - accuracy: 0.9164 - val_loss: 0.7659 - val_accuracy: 0.7294
Epoch 14/20
87/87 [==============================] - 4s 50ms/step - loss: 0.2447 - accuracy: 0.9095 - val_loss: 0.7433 - val_accuracy: 0.7438
Epoch 15/20
87/87 [==============================] - 4s 41ms/step - loss: 0.2149 - accuracy: 0.9301 - val_loss: 0.7833 - val_accuracy: 0.7323
Epoch 16/20
87/87 [==============================] - 4s 41ms/step - loss: 0.1932 - accuracy: 0.9323 - val_loss: 0.8324 - val_accuracy: 0.7337
Epoch 17/20
87/87 [==============================] - 4s 48ms/step - loss: 0.1967 - accuracy: 0.9319 - val_loss: 0.8828 - val_accuracy: 0.7366
Epoch 18/20
87/87 [==============================] - 4s 49ms/step - loss: 0.1624 - accuracy: 0.9450 - val_loss: 0.7981 - val_accuracy: 0.7410
Epoch 19/20
87/87 [==============================] - 4s 48ms/step - loss: 0.1484 - accuracy: 0.9537 - val_loss: 0.8152 - val_accuracy: 0.7467
Epoch 20/20
87/87 [==============================] - 4s 48ms/step - loss: 0.1250 - accuracy: 0.9580 - val_loss: 0.8392 - val_accuracy: 0.7482
27/27 [==============================] - 1s 35ms/step - loss: 0.9454 - accuracy: 0.7396
Test Loss: 0.9454
Test Accuracy: 0.7396
27/27 [==============================] - 1s 31ms/step
Accuracy Score: 0.7396
Classification Report:
precision recall f1-score support
daisy 0.68 0.71 0.69 157
dandelion 0.81 0.79 0.80 198
rose 0.66 0.73 0.70 154
sunflower 0.77 0.70 0.73 141
tulip 0.77 0.75 0.76 214
accuracy 0.74 864
macro avg 0.74 0.74 0.74 864
weighted avg 0.74 0.74 0.74 864
Much better scores across the board. Let's use this model again on an oversampled dataset.
# Add custom top layers for flower classification
x = Flatten()(base_model.output)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(5, activation='softmax')(x)
# Create the final model
model = Model(inputs=input_tensor, outputs=x)
# Freeze the weights of the VGG16 base model
for layer in base_model.layers:
layer.trainable = False
# Apply SMOTE oversampling to balance the class distribution
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train.reshape(-1, 100*100*3), y_train_encoded)
# Convert the numerical labels to one-hot encoded format
y_train_resampled = to_categorical(y_train_resampled, num_classes=5)
y_test = to_categorical(y_test_encoded, num_classes=5)
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train the model on the resampled data
history = model.fit(X_train_resampled.reshape(-1, 100, 100, 3), y_train_resampled, epochs=20, batch_size=32, validation_split=0.2)
# Evaluate the model on the testing data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')
# Predict labels for the testing data
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)
# Calculate accuracy score
accuracy = accuracy_score(y_true_classes, y_pred_classes)
print(f'Accuracy Score: {accuracy:.4f}')
# Generate a classification report
target_names = label_encoder.classes_
classification_rep = classification_report(y_true_classes, y_pred_classes, target_names=target_names)
print('Classification Report:')
print(classification_rep)
# Generate a confusion matrix
confusion = confusion_matrix(y_true_classes, y_pred_classes)
# Plot the confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.2) # Adjust font size
sns.heatmap(confusion, annot=True, fmt='d', cmap='Blues', xticklabels=target_names, yticklabels=target_names)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()
Epoch 1/20
107/107 [==============================] - 7s 51ms/step - loss: 1.1208 - accuracy: 0.5700 - val_loss: 1.1556 - val_accuracy: 0.5105
Epoch 2/20
107/107 [==============================] - 5s 46ms/step - loss: 0.8065 - accuracy: 0.6938 - val_loss: 1.0737 - val_accuracy: 0.5632
Epoch 3/20
107/107 [==============================] - 5s 48ms/step - loss: 0.7211 - accuracy: 0.7272 - val_loss: 0.9853 - val_accuracy: 0.5785
Epoch 4/20
107/107 [==============================] - 5s 43ms/step - loss: 0.6221 - accuracy: 0.7746 - val_loss: 1.0855 - val_accuracy: 0.5550
Epoch 5/20
107/107 [==============================] - 5s 47ms/step - loss: 0.5733 - accuracy: 0.7898 - val_loss: 0.5901 - val_accuracy: 0.7693
Epoch 6/20
107/107 [==============================] - 5s 45ms/step - loss: 0.5338 - accuracy: 0.7989 - val_loss: 0.7883 - val_accuracy: 0.6721
Epoch 7/20
107/107 [==============================] - 5s 44ms/step - loss: 0.4662 - accuracy: 0.8203 - val_loss: 0.7044 - val_accuracy: 0.7061
Epoch 8/20
107/107 [==============================] - 5s 43ms/step - loss: 0.3980 - accuracy: 0.8633 - val_loss: 0.7734 - val_accuracy: 0.6803
Epoch 9/20
107/107 [==============================] - 5s 50ms/step - loss: 0.3843 - accuracy: 0.8642 - val_loss: 0.6845 - val_accuracy: 0.7295
Epoch 10/20
107/107 [==============================] - 5s 47ms/step - loss: 0.3485 - accuracy: 0.8747 - val_loss: 0.5317 - val_accuracy: 0.8033
Epoch 11/20
107/107 [==============================] - 5s 46ms/step - loss: 0.3256 - accuracy: 0.8858 - val_loss: 0.7564 - val_accuracy: 0.7225
Epoch 12/20
107/107 [==============================] - 5s 46ms/step - loss: 0.3055 - accuracy: 0.8888 - val_loss: 0.5647 - val_accuracy: 0.7881
Epoch 13/20
107/107 [==============================] - 5s 47ms/step - loss: 0.2781 - accuracy: 0.8996 - val_loss: 0.6575 - val_accuracy: 0.7529
Epoch 14/20
107/107 [==============================] - 5s 46ms/step - loss: 0.2526 - accuracy: 0.9131 - val_loss: 0.5298 - val_accuracy: 0.8009
Epoch 15/20
107/107 [==============================] - 5s 46ms/step - loss: 0.2249 - accuracy: 0.9183 - val_loss: 0.5859 - val_accuracy: 0.7845
Epoch 16/20
107/107 [==============================] - 5s 48ms/step - loss: 0.1956 - accuracy: 0.9335 - val_loss: 0.4223 - val_accuracy: 0.8443
Epoch 17/20
107/107 [==============================] - 5s 47ms/step - loss: 0.1813 - accuracy: 0.9379 - val_loss: 0.6981 - val_accuracy: 0.7564
Epoch 18/20
107/107 [==============================] - 4s 42ms/step - loss: 0.1786 - accuracy: 0.9385 - val_loss: 0.7322 - val_accuracy: 0.7482
Epoch 19/20
107/107 [==============================] - 5s 47ms/step - loss: 0.1903 - accuracy: 0.9297 - val_loss: 0.6421 - val_accuracy: 0.7752
Epoch 20/20
107/107 [==============================] - 5s 44ms/step - loss: 0.1762 - accuracy: 0.9385 - val_loss: 0.5112 - val_accuracy: 0.8326
27/27 [==============================] - 1s 35ms/step - loss: 0.9021 - accuracy: 0.7512
Test Loss: 0.9021
Test Accuracy: 0.7512
27/27 [==============================] - 1s 30ms/step
Accuracy Score: 0.7512
Classification Report:
precision recall f1-score support
daisy 0.69 0.71 0.70 157
dandelion 0.77 0.86 0.81 198
rose 0.69 0.73 0.71 154
sunflower 0.81 0.70 0.75 141
tulip 0.79 0.73 0.76 214
accuracy 0.75 864
macro avg 0.75 0.75 0.75 864
weighted avg 0.75 0.75 0.75 864
Huge improvement dandelion. Can we continue messing around and get better results? Most definitely. But I'm going to end things here.
%%shell
jupyter nbconvert /content/drive/MyDrive/Colab\ Notebooks/Flowers_Prediction.ipynb --to html
[NbConvertApp] Converting notebook /content/drive/MyDrive/Colab Notebooks/Flowers_Prediction.ipynb to html [NbConvertApp] Writing 2935539 bytes to /content/drive/MyDrive/Colab Notebooks/Flowers_Prediction.html